Mental Illness

Load and clean data

mental_df = 
  read_csv("./data/mental_data.csv") %>% 
  janitor::clean_names() %>% 
  mutate(
    any_mental_num = any_mental_num / 1000000,
    any_mental_per = any_mental_per * 100,
    ser_mental_num = ser_mental_num / 1000000,
    ser_mental_per = ser_mental_per * 100,
    state_abb = state.abb[match(state, state.name)],
    region = state.region[match(state, state.name)]
  ) %>% 
  mutate(
    state_abb = replace(state_abb, state == "District of Columbia", "DC"))
## Rows: 51 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (9): any_mental_per, any_mental_num, ser_mental_per, ser_mental_num, tee...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Map: Percent of adults reporting any mental illness by state between 2019-2020

state_mental=
  plot_usmap(
    data = mental_df,
    regions = "state",
    values = "any_mental_per", 
    labels = TRUE, label_color = "white") +
  labs(
    title = "Percent of adults reporting any mental illness for each state, 2019-2022"
  ) +
  scale_fill_continuous(
    name = "Mental illness percent (%)",
    label = scales::comma) +
  theme(legend.position = "right")

ggplotly(state_mental)

According to the mental health data collected between 2019 -2020, the mental illness percents are high in the US overal, with variations between states.

Any/Serious Mental illness numbers (million), by region, 2019-2020

any_mental_plot = 
  mental_df %>% 
    group_by(region) %>%
    drop_na() %>% 
    summarize(any_mental_num = sum(any_mental_num)) %>% 
    ggplot(
      aes(x = region, y = any_mental_num, fill = region)) +
    geom_bar(stat = "identity") +
    labs(
      title = "Any Mental Illness Number, by Region, 2019-2020",
      x = "Region",
      y = "Mental illness number (million)",
      fill = "Region") +
  theme(legend.position = "bottom")

ser_mental_plot =
  mental_df %>% 
    group_by(region) %>%
    drop_na() %>% 
    summarize(ser_mental_num = sum(ser_mental_num)) %>% 
    ggplot(
      aes(x = region, y = ser_mental_num, fill = region)) +
    geom_bar(stat = "identity") +
    labs(
      title = "Serious Mental Illness Number, by Region, 2019-2020",
      x = "Region",
      y = "Mental illness number (million)",
      fill = "Region") +
    theme(legend.position = "bottom")

grid.arrange(any_mental_plot, ser_mental_plot, ncol =2) 

Comment Both any mental illness and serious mental illness are highest in the South, lowest in the northeast.

Any/Serious Mental illness percent, Top 10 states, 2019-2020

any_top10_plot =
  mental_df %>% 
    filter(row_number(desc(any_mental_per)) <= 10) %>% 
    mutate(
      state = fct_reorder(state, any_mental_per)
    ) %>% 
    ggplot(
      aes(x = any_mental_per, y = state, fill = state)) +
      geom_bar(stat = "identity") +
      labs(
        title = "Any Mental Illness Percent, Top 10 States",
        x = "Any Mental illness percent (%)",
        y = "State",
        fill = "State") +
    theme(legend.position = "bottom")

ser_top10_plot =
  mental_df %>% 
    filter(row_number(desc(ser_mental_per)) <= 10) %>% 
    mutate(
      state = fct_reorder(state, ser_mental_per)
    ) %>% 
    ggplot(
      aes(x = ser_mental_per, y = state, fill = state)) +
      geom_bar(stat = "identity") +
      labs(
        title = "Serious Mental Illness Percent, Top 10 States",
        x = "Serious mental illness percent (%)",
        y = "State",
      fill = "State") +
    theme(legend.position = "bottom")

grid.arrange(any_top10_plot, ser_top10_plot, ncol =2) 

Comment

The top 10 states for any and serious mental illness are 8/10 the same, except Washington, Rhode Island, Arkansas and Indiana. Ultah has the highest any/serious mental illness percent.

Depression and anxiety

data cleaning

nhis = 
  read_csv("data/nhis_data01.csv") %>% 
  janitor::clean_names() %>% 
  mutate(
    worrx = recode_factor(worrx,'1' = "no", '2' = "yes"),
    worfreq = recode_factor(worfreq, '1' = "Daily", '2' = "Weekly", 
                            '3' = "Monthly", '4' = "A few times a year", 
                            '5' = "Never"),
    worfeelevl = recode_factor(worfeelevl, '1' = "A lot", 
                               '3' = "Somewhere between a little and a lot", 
                               '2' = "A little"),
    deprx = recode_factor(deprx, '1' = "no", '2' = "yes"),
    depfreq = recode_factor(depfreq, '1' = "Daily", '2' = "Weekly", 
                            '3' = "Monthly", '4' = "A few times a year", 
                            '5' = "Never"),
    depfeelevl = recode_factor(depfeelevl, '1' = "A lot", 
                               '3' = "Somewhere between a little and a lot", 
                               '2' = "A little")
  )
## Rows: 468212 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): NHISHID, NHISPID, HHX, FMX, PX
## dbl (28): YEAR, SERIAL, STRATA, PSU, HHWEIGHT, PERNUM, PERWEIGHT, SAMPWEIGHT...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
recent_yr_df = 
  nhis %>% 
  filter(year>2019)

Worries and anxiety

number of people reported taken medication for worried, nervous, or anxious feeings from 2015 to 2021.

nhis %>% 
  filter(year>=2015) %>%
  drop_na(worrx) %>% 
  group_by(year, worrx) %>% 
  summarize(wor_num = n()) %>% 
  pivot_wider(
    names_from = worrx,
    values_from = wor_num
  ) %>% 
  mutate(
    wor_percentage = yes/(no + yes)*100,
    text_label = str_c(yes, " out of ", no + yes)
  ) %>% 
  plot_ly(
    y = ~wor_percentage,
    x = ~year,
    color = ~year,
    type = "bar", 
    colors = "viridis",
    text = ~text_label) %>% 
  layout(title = "Percentage of people reported taken medication for worried, nervous, or anxious feeings each year",
         xaxis = list (title = ""),
         yaxis = list (title = "Percentage")
         ) %>% 
  hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Frequency of worries

nhis %>% 
  filter(year>=2015) %>% 
  drop_na(worfreq) %>% 
  group_by(year, worfreq) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     worfreq = worfreq,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~worfreq,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

worry level distribution.

nhis %>% 
  filter(year>=2015) %>%
  drop_na(worfeelevl) %>% 
  group_by(year, worfeelevl) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     worfeelevl = worfeelevl,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~worfeelevl,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of worry levels",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

logistic regression

whether taken medication for worried, nervous, or anxious feelings is associated with covid-19 adjusting for sex and family income level.

glm(worrx ~ sex + poverty + cvddiag, 
                            family=binomial(link='logit'),
                            data = recent_yr_df) %>% 
    broom::tidy(conf.int = TRUE) %>% 
    mutate(
      odds_ratio = exp(estimate),
      low = exp(conf.low),
      high = exp(conf.high)
    ) %>% 
    select(term, odds_ratio, low, high) %>% 
  knitr::kable(digits=2)
term odds_ratio low high
(Intercept) 0.08 0.07 0.09
sex 2.09 1.98 2.20
poverty 0.98 0.98 0.99
cvddiag 1.07 1.03 1.12

Depression

number of people reported taken medication for depression from 2015 to 2021.

nhis %>% 
  filter(year>=2015) %>% 
  drop_na(deprx) %>%
  group_by(year, deprx) %>% 
  summarize(dep_num = n()) %>% 
  pivot_wider(
    names_from = deprx,
    values_from = dep_num
  ) %>% 
  mutate(
    dep_percentage = yes/(no + yes)*100,
    text_label = str_c(yes, " out of ", no + yes)
  ) %>% 
  plot_ly(
    y = ~dep_percentage,
    x = ~year,
    color = ~year,
    type = "bar", 
    colors = "viridis",
    text = ~text_label) %>% 
  layout(title = "Percentage of people reported taken medication for depression each year",
         xaxis = list (title = ""),
         yaxis = list (title = "Percentage")
         ) %>% 
  hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Frequency of feel depressed

nhis %>% 
  filter(year>=2015) %>%
  drop_na(depfreq) %>% 
  group_by(year, depfreq) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     depfreq = depfreq,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~depfreq,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of frequency of feel depressed",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

depression level distribution.

nhis %>% 
  filter(year>=2015) %>%
  drop_na(depfeelevl) %>% 
  group_by(year, depfeelevl) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  mutate(
    percentage = 100 * count/sum(count),
    sum_count = sum(count),
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~depfeelevl,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of depression levels",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

logistic regression

whether taken medication for depression is associated with covid-19 adjusting for sex and family income level.

glm(deprx ~ sex + poverty + cvddiag, 
                            family=binomial(link='logit'),
                            data = recent_yr_df) %>% 
    broom::tidy(conf.int = TRUE) %>% 
    mutate(
      odds_ratio = exp(estimate),
      low = exp(conf.low),
      high = exp(conf.high)
    ) %>% 
    select(term, odds_ratio, low, high) %>% 
  knitr::kable(digits=2)
term odds_ratio low high
(Intercept) 0.08 0.07 0.09
sex 2.08 1.97 2.20
poverty 0.97 0.97 0.98
cvddiag 1.05 1.00 1.10